head(readstate)
multi_cluster <- readstate %>%
group_by(state,months) %>%
summarise(` Maximum Temperature`= max(` Maximum Temperature`),
` Minimum Temperature`= min(` Minimum Temperature`),
` Precipitation` = mean(` Precipitation`),
` Average Temperature` = mean(` Average Temperature`),
` Palmer Drought Severity Index (PDSI)`
)
`summarise()` has grouped output by 'state', 'months'. You can override using
the `.groups` argument.
multi_cluster <- multi_cluster %>%
group_by(state) %>%
summarise(max(` Maximum Temperature`), min(` Minimum Temperature`), mean(` Precipitation`)
, mean(` Average Temperature`), mean(` Palmer Drought Severity Index (PDSI)`))
# ho cercato su internet precipitazioni medie annuali texas -> 51.2 inches -> divido per 12 -> 4.266667
multi_cluster[multi_cluster$state=="Texas",4] <- 4.266667
multi_cluster[multi_cluster$state=="Alaska",6] <- 0
which(is.na(multi_cluster))
integer(0)
dataset.e <- dist(multi_cluster, method = 'euclidean') #no method -> euclidean (default)
Warning in dist(multi_cluster, method = "euclidean") :
NAs introduced by coercion
dataset.es <- hclust(dataset.e, method = 'single')
dataset.ea <- hclust(dataset.e, method = 'average')
dataset.ec <- hclust(dataset.e, method = 'complete')
dataset.ew <- hclust(dataset.e, method = 'ward.D2')
#plot of dendograms
par(mfrow=c(1,3))
plot(dataset.es, main='euclidean-single', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ec, main='euclidean-complete', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ea, main='euclidean-average', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ew, main='euclidean-ward', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
#cutting dendograms: (with k=2 fixed)
cluster.ec <- cutree(dataset.ec, k=3) # euclidean-complete:
cluster.ea <- cutree(dataset.ea, k=3) # euclidean-average:
cluster.es <- cutree(dataset.es, k=3) # euclidean-simple:
cluster.ew <- cutree(dataset.ew, k=3) # euclidean-simple:
# Let's give a mark to the algorithms: did they aggregate coherently with
# the dissimilarity matrix or not?
# compute the cophenetic matrices
coph.es <- cophenetic(dataset.es)
coph.ec <- cophenetic(dataset.ec)
coph.ea <- cophenetic(dataset.ea)
coph.ew <- cophenetic(dataset.ew)
# compute cophenetic coefficients (<-> corr(D,C)) D:dist mat; C:coph dist mat
es <- cor(dataset.e, coph.es)
ec <- cor(dataset.e, coph.ec)
ea <- cor(dataset.e, coph.ea)
ew <- cor(dataset.e, coph.ew)
c("Eucl-Single"=es,"Eucl-Compl."=ec,"Eucl-Ave."=ea, "Eucl-War."=ew)
Eucl-Single Eucl-Compl. Eucl-Ave. Eucl-War.
0.5302177 0.6771974 0.7414933 0.6742687
Ha più senso utilizzare Euclidean-Average con k=3
library(plotly)
us_data <- map_data("state")
df <- data.frame(
state = tolower(multi_cluster$state),
values = cluster.ew
)
library(usmap)
plot_usmap(data = df) + labs(title = "Multivariate cluster (all cov)")
#Now standardize data
std_multi_cluster<- scale(multi_cluster[,-1])
dataset.e <- dist(std_multi_cluster, method = 'euclidean') #no method -> euclidean (default)
dataset.es <- hclust(dataset.e, method = 'single')
dataset.ea <- hclust(dataset.e, method = 'average')
dataset.ec <- hclust(dataset.e, method = 'complete')
dataset.ew <- hclust(dataset.e, method = 'ward.D2')
#plot of dendograms
par(mfrow=c(1,3))
plot(dataset.es, main='euclidean-single', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ec, main='euclidean-complete', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ea, main='euclidean-average', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ew, main='euclidean-ward', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
#cutting dendograms: (with k=2 fixed)
cluster.ec <- cutree(dataset.ec, k=3)
cluster.ea <- cutree(dataset.ea, k=4) # va bene 3 o 4
cluster.es <- cutree(dataset.es, k=3)
cluster.ew <- cutree(dataset.ew, k=4) #va bene 3 o 4
# Let's give a mark to the algorithms: did they aggregate coherently with
# the dissimilarity matrix or not?
# compute the cophenetic matrices
coph.es <- cophenetic(dataset.es)
coph.ec <- cophenetic(dataset.ec)
coph.ea <- cophenetic(dataset.ea)
coph.ew <- cophenetic(dataset.ew)
# compute cophenetic coefficients (<-> corr(D,C)) D:dist mat; C:coph dist mat
es <- cor(dataset.e, coph.es)
ec <- cor(dataset.e, coph.ec)
ea <- cor(dataset.e, coph.ea)
ew <- cor(dataset.e, coph.ew)
c("Eucl-Single"=es,"Eucl-Compl."=ec,"Eucl-Ave."=ea, "Eucl-War."=ew)
Eucl-Single Eucl-Compl. Eucl-Ave. Eucl-War.
0.6070141 0.7259835 0.7478126 0.5723762
Anche in questo caso E-A ma k=2 migliore (ignoreremo ciò e usiamo k=3)
library(plotly)
us_data <- map_data("state")
df <- data.frame(
state = tolower(multi_cluster$state),
values = cluster.ew
)
library(usmap)
plot_usmap(data = df) + labs(title = "Std Multivariate cluster (all cov)")
std_multi_cluster<- scale(multi_cluster[,-c(1,6)])
dataset.e <- dist(std_multi_cluster, method = 'euclidean') #no method -> euclidean (default)
dataset.es <- hclust(dataset.e, method = 'single')
dataset.ea <- hclust(dataset.e, method = 'average')
dataset.ec <- hclust(dataset.e, method = 'complete')
dataset.ew <- hclust(dataset.e, method = 'ward.D2')
#plot of dendograms
par(mfrow=c(1,3))
plot(dataset.es, main='euclidean-single', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ec, main='euclidean-complete', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ea, main='euclidean-average', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ew, main='euclidean-ward', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
#cutting dendograms: (with k=2 fixed)
cluster.ec <- cutree(dataset.ec, k=3) # euclidean-complete:
cluster.ea <- cutree(dataset.ea, k=3) # euclidean-average:
cluster.es <- cutree(dataset.es, k=3) # euclidean-simple:
cluster.ew <- cutree(dataset.ew, k=4) # euclidean-simple:
# Let's give a mark to the algorithms: did they aggregate coherently with
# the dissimilarity matrix or not?
# compute the cophenetic matrices
coph.es <- cophenetic(dataset.es)
coph.ec <- cophenetic(dataset.ec)
coph.ea <- cophenetic(dataset.ea)
coph.ew <- cophenetic(dataset.ew)
# compute cophenetic coefficients (<-> corr(D,C)) D:dist mat; C:coph dist mat
es <- cor(dataset.e, coph.es)
ec <- cor(dataset.e, coph.ec)
ea <- cor(dataset.e, coph.ea)
ew <- cor(dataset.e, coph.ew)
c("Eucl-Single"=es,"Eucl-Compl."=ec,"Eucl-Ave."=ea, "Eucl-War."=ew)
Eucl-Single Eucl-Compl. Eucl-Ave. Eucl-War.
0.5942218 0.5589432 0.7714117 0.5725603
library(plotly)
us_data <- map_data("state")
df <- data.frame(
state = tolower(multi_cluster$state),
values = cluster.ew
)
library(usmap)
plot_usmap(data = df) + labs(title = "Std Multivariate cluster (without PDSI)")
plot_ly(x=std_multi_cluster[,1], y=std_multi_cluster[,2], z=std_multi_cluster[,3], color=cluster.ew, pch=19)
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
No scatter3d mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
Warning: 'scatter3d' objects don't have these attributes: 'pch'
Valid attributes include:
'connectgaps', 'customdata', 'customdatasrc', 'error_x', 'error_y', 'error_z', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'projection', 'scene', 'showlegend', 'stream', 'surfaceaxis', 'surfacecolor', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
No scatter3d mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
Warning: 'scatter3d' objects don't have these attributes: 'pch'
Valid attributes include:
'connectgaps', 'customdata', 'customdatasrc', 'error_x', 'error_y', 'error_z', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'projection', 'scene', 'showlegend', 'stream', 'surfaceaxis', 'surfacecolor', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'